1 Effect of UPSTM-Based Decorrelation on Feature Discovery

1.0.1 Loading the libraries

library("FRESA.CAD")
library(readxl)
library(igraph)
library(umap)
library(tsne)
library(entropy)
library(psych)
library(whitening)
library("vioplot")
library("pls")

op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

1.1 Material

Data Source https://www.kaggle.com/datasets/fedesoriano/body-fat-prediction-dataset

“Source The data were generously supplied by Dr. A. Garth Fisher who gave permission to freely distribute the data and use for non-commercial purposes.

Roger W. Johnson Department of Mathematics & Computer Science South Dakota School of Mines & Technology 501 East St. Joseph Street Rapid City, SD 57701

email address: web address: http://silver.sdsmt.edu/~rwjohnso

1.2 The Data

body_fat <- read.csv("~/GitHub/LatentBiomarkers/Data/BodyFat/BodyFat.csv", header=TRUE)

### Removing density as estimator
body_fat$Density <- NULL

#par(cex=0.5)

1.2.1 Standarize the names for the reporting

studyName <- "Body_Fat"
dataframe <- body_fat
outcome <- "BodyFat"

thro <- 0.40
cexheat <- 0.75
loops  <- 100

bfm <- filteredFit(BodyFat~.,body_fat,fitmethod=LASSO_MIN,filtermethod=NULL,DECOR=TRUE)
plot(body_fat$BodyFat~predict(bfm,body_fat))

bfm <- filteredFit(BodyFat~.,body_fat,fitmethod=LASSO_MIN,filtermethod=NULL)
plot(body_fat$BodyFat~predict(bfm,body_fat))

bfm <-  plsr(BodyFat~.,data=body_fat)
class(bfm)

[1] “mvr”

prd <- predict(bfm,newdata=body_fat)[,1,5]
plot(body_fat$BodyFat~prd)

bfm <-  pcr(BodyFat~.,data=body_fat)
class(bfm)

[1] “mvr”

prd <- predict(bfm,newdata=body_fat)[,1,5]
plot(body_fat$BodyFat~prd)

bfm <-  cppls(BodyFat~.,data=body_fat)
prd <- predict(bfm,newdata=body_fat)[,1,5]
plot(body_fat$BodyFat~prd)


bfm <-  GLMNET_RIDGE_MIN(BodyFat~.,data=body_fat)
prd <- predict(bfm,newdata=body_fat)
plot(body_fat$BodyFat~prd)

bfm <-  GLMNET_ELASTICNET_MIN(BodyFat~.,data=body_fat)
prd <- predict(bfm,newdata=body_fat)
plot(body_fat$BodyFat~prd)

1.3 Generaring the report

1.3.1 Libraries

Some libraries

library(psych)
library(whitening)
library("vioplot")
library("rpart")
library(multiColl)
library(car)
library("pls")

source("C:/Users/jtame/Documents/GitHub/LatentBiomarkers/RMD/RepeatedLinearCV.R")

1.3.2 Data specs

pander::pander(c(rows=nrow(dataframe),col=ncol(dataframe)-1))
rows col
252 13

varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]

largeSet <- length(varlist) > 1500 

1.3.3 Training and testing sets

set.seed(1)
trainsamples <- sample(nrow(dataframe),3*nrow(dataframe)/4)

trainingset <- dataframe[trainsamples,]
testingset <- dataframe[-trainsamples,]

pander::pander(t(summary(trainingset)))
             
BodyFat Min. : 0.00 1st Qu.:13.60 Median :19.50 Mean :19.32 3rd Qu.:25.30 Max. :38.10
Age Min. :22.00 1st Qu.:35.00 Median :43.00 Mean :44.62 3rd Qu.:53.00 Max. :81.00
Weight Min. :118.5 1st Qu.:160.2 Median :177.0 Mean :180.1 3rd Qu.:198.5 Max. :363.1
Height Min. :29.50 1st Qu.:68.50 Median :70.25 Mean :70.23 3rd Qu.:72.25 Max. :77.75
Neck Min. :31.10 1st Qu.:36.40 Median :38.00 Mean :38.05 3rd Qu.:39.80 Max. :51.20
Chest Min. : 79.3 1st Qu.: 95.4 Median : 99.7 Mean :101.0 3rd Qu.:105.3 Max. :136.2
Abdomen Min. : 69.40 1st Qu.: 86.00 Median : 91.80 Mean : 93.05 3rd Qu.: 99.70 Max. :148.10
Hip Min. : 85.0 1st Qu.: 95.6 Median : 99.5 Mean :100.2 3rd Qu.:103.9 Max. :147.7
Thigh Min. :47.20 1st Qu.:56.30 Median :59.00 Mean :59.59 3rd Qu.:63.10 Max. :87.30
Knee Min. :33.00 1st Qu.:37.10 Median :38.50 Mean :38.65 3rd Qu.:40.10 Max. :49.10
Ankle Min. :19.10 1st Qu.:22.00 Median :22.90 Mean :23.13 3rd Qu.:24.00 Max. :33.90
Biceps Min. :24.80 1st Qu.:30.50 Median :32.10 Mean :32.42 3rd Qu.:34.60 Max. :45.00
Forearm Min. :21.00 1st Qu.:27.30 Median :28.80 Mean :28.66 3rd Qu.:30.10 Max. :33.70
Wrist Min. :15.80 1st Qu.:17.60 Median :18.30 Mean :18.24 3rd Qu.:18.80 Max. :21.40

varlist <- colnames(trainingset)
varlist <- varlist[varlist != outcome]

1.3.4 Correlation Matrix of the Data

The heat map of the data

par(op)


  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  cormat <- cor(testingset[,varlist],method="pearson")
  cormat[is.na(cormat)] <- 0
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Original Correlation",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  diag(cormat) <- 0
  pander::pander(max(abs(cormat)))

0.917

par(op)

1.3.5 Decorrelating using ILAA

ILAA bootstrapped training and testing sets

trainage_DE <- ILAA(trainingset,thr=thro,Outcome=outcome,verbose=TRUE)

fast | LM | Weight Age Weight Height Neck Chest Abdomen 0.07692308 1.00000000 0.15384615 0.53846154 0.84615385 0.76923077

Included: 13 , Uni p: 0.01153846 , Base Size: 1 , Rcrit: 0.1652337

1 <R=0.843,thr=0.900>, Top: 2< 1 >Fa= 2,<|><>Tot Used: 4 , Added: 2 , Zero Std: 0 , Max Cor: 0.898

2 <R=0.810,thr=0.800>, Top: 1< 5 >Fa= 2,<|><>Tot Used: 8 , Added: 5 , Zero Std: 0 , Max Cor: 0.747

3 <R=0.568,thr=0.700>, Top: 1< 1 >Fa= 2,<|><>Tot Used: 9 , Added: 1 , Zero Std: 0 , Max Cor: 0.611

4 <R=0.517,thr=0.600>, Top: 1< 1 >Fa= 2,<|><>Tot Used: 10 , Added: 1 , Zero Std: 0 , Max Cor: 0.594

5 <R=0.487,thr=0.500>, Top: 1< 1 >Fa= 2,<|><>Tot Used: 11 , Added: 1 , Zero Std: 0 , Max Cor: 0.487

6 <R=0.447,thr=0.400>, Top: 3< 1 >Fa= 4,<|><>Tot Used: 12 , Added: 4 , Zero Std: 0 , Max Cor: 0.384

7 <R=0.384,thr=0.400>

[ 7 ], 0.384463 Decor Dimension: 12 Nused: 12 . Cor to Base: 11 , ABase: 13 , Outcome Base: 0

#trainage_DE <- ILAA(trainingset,thr=thro,Outcome=outcome,verbose=TRUE,bootstrap=30)

testage_DE <- predictDecorrelate(trainage_DE,testingset)

1.3.6 The Formulas

Generating the formulas


theLaFormulas <- getLatentCoefficients(trainage_DE)

theCharformulas <- attr(theLaFormulas,"LatentCharFormulas")
pander::pander(as.matrix(theCharformulas))
La_Age + Age + (0.356)Weight - (1.395)Chest
La_Neck - (0.069)Weight + Neck
La_Chest - (0.255)Weight + Chest
La_Abdomen - (0.123)Weight - (0.695)Chest + Abdomen
La_Hip - (0.234)Weight + Hip
La_Thigh - (0.029)Weight - (0.550)Hip + Thigh
La_Knee - (0.072)Weight + Knee
La_Ankle - (0.036)Weight + Ankle
La_Biceps - (0.081)Weight + Biceps
La_Forearm - (9.93e-03)Weight - (0.391)Biceps + Forearm
La_Wrist - (0.024)Weight + Wrist

1.3.7 Formulas Network

Displaying the features associations

par(op)

  transform <- attr(trainage_DE,"UPLTM") != 0
  colnames(transform) <- str_remove_all(colnames(transform),"La_")
  transform <- abs(transform*cor(trainingset[,rownames(transform)])) # The weights are proportional to the observed correlation
  
  
  VertexSize <- attr(trainage_DE,"fscore") # The size depends on the variable independence relevance (fscore)
  names(VertexSize) <- str_remove_all(names(VertexSize),"La_")
  VertexSize <- 0.5+9.5*(VertexSize-min(VertexSize))/(max(VertexSize)-min(VertexSize)) # Normalization
  VertexSize <- VertexSize[colnames(transform)]
  gr <- graph_from_adjacency_matrix(transform,mode = "directed",diag = FALSE,weighted=TRUE)
  gr$layout <- layout_with_fr
  
  fc <- cluster_optimal(gr)
#          fc <- cluster_walktrap (gr,steps=50)

  plot(fc, gr,
       edge.width = 2*E(gr)$weight,
       vertex.size=VertexSize,
       edge.arrow.size=0.5,
       edge.arrow.width=0.75,
       vertex.label.color="purple",
#       vertex.label.cex=0.85,
#       vertex.label.dist=1.2,
       vertex.label.cex=(0.70 + 0.025*VertexSize),
       vertex.label.dist=(0.5 + 0.05*VertexSize),

       
       main="Feature Association")


par(op)

      varratios <- attr(trainage_DE,"VarRatio")
      names(varratios) <- str_remove_all(names(varratios),"La_")
      fscores <- attr(trainage_DE,"fscore") 
      names(fscores) <- str_remove_all(names(fscores),"La_")
      clustable <- as.data.frame(cbind(Variable=fc$names,
                                       Formula=as.character(theCharformulas[paste("La_",fc$names,sep="")]),
                                       Cluster=fc$membership,
                                       ResidualVariance=round(varratios[fc$names],3),
                                       Fscore=round(fscores[fc$names],3)
                                       )
                                 )
      rownames(clustable) <- str_replace_all(rownames(clustable),"__","_")
      clustable$Variable <- NULL
      clustable$Cluster <- as.integer(clustable$Cluster)
      clustable$ResidualVariance <- as.numeric(clustable$ResidualVariance)
      clustable$Fscore <- as.numeric(clustable$Fscore)
      clustable <- clustable[order(-clustable$Fscore),]
      clustable <- clustable[order(-clustable$ResidualVariance),]
      clustable <- clustable[order(clustable$Cluster),]

pander::pander(as.matrix(clustable))
  Formula Cluster ResidualVariance Fscore
Age + Age + (0.356)Weight - (1.395)Chest 1 0.831 -1
Chest - (0.255)Weight + Chest 1 0.193 2
Abdomen - (0.123)Weight - (0.695)Chest + Abdomen 1 0.133 -2
Weight NA 2 1.000 9
Ankle - (0.036)Weight + Ankle 2 0.648 -1
Wrist - (0.024)Weight + Wrist 2 0.442 -1
Neck - (0.069)Weight + Neck 2 0.311 -1
Knee - (0.072)Weight + Knee 2 0.259 -1
Thigh - (0.029)Weight - (0.550)Hip + Thigh 3 0.183 -2
Hip - (0.234)Weight + Hip 3 0.102 0
Forearm - (9.93e-03)Weight - (0.391)Biceps + Forearm 4 0.511 -2
Biceps - (0.081)Weight + Biceps 4 0.347 0

1.3.8 Correlation Matrix of the Data

The heat map of the ILAA transformed data

par(op)

varlistDe <- colnames(trainage_DE)
varlistDe <- varlistDe[varlistDe != outcome]

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)

# Training    
  cormat <- cor(trainage_DE[,varlistDe],method="pearson")
  cormat[is.na(cormat)] <- 0
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Training: After ILAA Correlation",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  diag(cormat) <- 0
  pander::pander(max(abs(cormat)))

0.384


  par(op)

# Testing

  cormat <- cor(testage_DE[,varlistDe],method="pearson")
  cormat[is.na(cormat)] <- 0
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Testing: After ILAA Correlation",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")

  diag(cormat) <- 0
  pander::pander(max(abs(cormat)))

0.54


  par(op)

  

1.3.9 Modeling

1.3.9.1 Modeling outcome using raw set

outcomeModel <- LASSO_1SE(formula(paste(outcome,"~.")),trainingset);
predOutcome <- predict(outcomeModel,testingset)
pander::pander(as.matrix(outcomeModel$coef))
(Intercept) -23.4024
Age 0.0321
Height -0.0931
Abdomen 0.5748
Wrist -0.3093

1.3.9.2 Modeling outcome using decorrelated set

outcomeModel_DE <- LASSO_1SE(formula(paste(outcome,"~.")),trainage_DE);
predOutcome_DE <- predict(outcomeModel_DE,testage_DE)

pander::pander(as.matrix(outcomeModel_DE$coef),caption="ILAA Coef")
ILAA Coef
(Intercept) -26.0122
Weight 0.1265
La_Chest 0.4000
La_Abdomen 0.8502
La_Thigh 0.0252
obsCoef <- getObservedCoef(trainage_DE,outcomeModel_DE)
pander::pander(as.matrix(obsCoef$coef),caption="ILAA Modeling")
ILAA Modeling
(Intercept) -26.0122
Weight -0.0810
Chest -0.1910
Abdomen 0.8502
Hip -0.0138
Thigh 0.0252


pander::pander(cor.test(predOutcome,testingset[,outcome]),caption="Raw Model")
Raw Model
Test statistic df P value Alternative hypothesis cor
12.2 61 5.09e-18 * * * two.sided 0.842
pander::pander(cor.test(predOutcome_DE,testage_DE[,outcome]),caption="ILAA-based Model")
ILAA-based Model
Test statistic df P value Alternative hypothesis cor
12.6 61 1.16e-18 * * * two.sided 0.85

1.3.9.3 Univariate t-test


rawunittvalues <- apply(as.matrix(testingset[,names(outcomeModel$coef)[-1]]),2,tvals,testingset[,outcome])
names(rawunittvalues) <- names(outcomeModel$coef)[-1]

deunittvalues <- apply(testage_DE[,names(outcomeModel_DE$coef)[-1]],2,tvals,testingset[,outcome])

1.3.9.4 Comparing summaries


psig <- 0.1/(ncol(testingset)-1)
lmod <- lm(paste(outcome,"~."),testingset[,c(outcome,names(outcomeModel$coef)[-1])])
try(vifx <-vif(lm(paste(outcome,"~."),testingset[,c(outcome,names(outcomeModel$coef)[-1])])))
sm <- summary(lmod)
if (length(lmod$coef)>10)
{
  sm$coefficients[1,4] <- 1.0
  gcoef <- lmod$coef[sm$coefficients[,4]<psig]
  lmod <- lm(paste(outcome,"~."),testingset[,c(outcome,names(gcoef))])
  try(vifx <-vif(lm(paste(outcome,"~."),testingset[,c(outcome,names(gcoef))])))
}



sm <- summary(lmod)
smcoef <- as.data.frame(sm$coefficients)
smcoef <- smcoef[order(-abs(smcoef[,3])),]
smcoef$Uni_t_values <- rawunittvalues[rownames(smcoef)]
if (!inherits(vif,"try-error")) smcoef$vif <-vifx[rownames(smcoef)]
smcoef <- smcoef[!is.na(smcoef$Uni_t_values),]
if (nrow(smcoef)>10) smcoef <- smcoef[smcoef[,4]<psig,]

pander::pander(smcoef)
  Estimate Std. Error t value Pr(>|t|) Uni_t_values vif
Abdomen 0.7784 0.0633 12.301 8.37e-18 11.12 1.38
Height -0.6339 0.2766 -2.292 2.56e-02 -1.88 1.70
Wrist -1.9320 0.8755 -2.207 3.13e-02 1.64 1.87
Age 0.0385 0.0549 0.701 4.86e-01 2.23 1.49
pander::pander(t(c(R2=sm$r.squared,adj_R2=sm$adj.r.squared)))
R2 adj_R2
0.768 0.752
pander::pander(c(numvar=nrow(smcoef)))
numvar
4


lmod_DE <- lm(paste(outcome,"~."),testage_DE[,c(outcome,names(outcomeModel_DE$coef)[-1])])
try(vifx <-vif(lm(paste(outcome,"~."),testage_DE[,c(outcome,names(outcomeModel_DE$coef)[-1])])))

sm <- summary(lmod_DE)
if (length(lmod_DE$coef)>10)
{
  sm$coefficients[1,4] <- 1.0
  gcoef <- lmod_DE$coef[sm$coefficients[,4]<psig]
  lmod_DE <- lm(paste(outcome,"~."),testage_DE[,c(outcome,names(gcoef))])
  try(vifx <-vif(lm(paste(outcome,"~."),testage_DE[,c(outcome,names(gcoef))])))
}

sm <- summary(lmod_DE)
lacoef <- as.data.frame(sm$coefficients)
lacoef <- lacoef[order(-abs(lacoef[,3])),]
lacoef$Uni_t_values <- deunittvalues[rownames(lacoef)]
if (!inherits(vifx,"try-error")) lacoef$vif <-vifx[rownames(lacoef)]
lacoef <- lacoef[!is.na(lacoef$Uni_t_values),]
if (nrow(lacoef)>10) lacoef <- lacoef[lacoef[,4]<psig,]

lacoef$formula <- theCharformulas[rownames(lacoef)]
lacoef$VarRatio <- varratios[str_remove_all(rownames(lacoef),"La_")]

pander::pander(lacoef)
  Estimate Std. Error t value Pr(>|t|) Uni_t_values vif formula VarRatio
Weight 0.1570 0.0206 7.623 2.63e-10 4.88 1.00 NA 1.000
La_Abdomen 0.9196 0.1398 6.576 1.50e-08 5.62 1.10 - (0.123)Weight - (0.695)Chest + Abdomen 0.133
La_Chest 0.9209 0.1634 5.637 5.36e-07 5.34 1.22 - (0.255)Weight + Chest 0.193
La_Thigh -0.0345 0.2729 -0.126 9.00e-01 -1.79 1.14 - (0.029)Weight - (0.550)Hip + Thigh 0.183
pander::pander(t(c(R2=sm$r.squared,adj_R2=sm$adj.r.squared)))
R2 adj_R2
0.755 0.738
pander::pander(c(numvar=nrow(lacoef)))
numvar
4


xvals <-c(min(c(deunittvalues,rawunittvalues))-3,max(c(deunittvalues,rawunittvalues))+3)

par(mfrow=c(1,2),cex=0.5)

plot(smcoef[,c(3,5)],
     main="Raw: Univariate t-values vs regression t-values",
     xlim=xvals,
     ylim=xvals
     )

lmtvals <- lm(smcoef[,5]~smcoef[,3])
pred <- lmtvals$coefficients[1] + lmtvals$coefficients[2] * xvals
lines(x=xvals,y=pred,col="red")
text(xvals[1]+(xvals[2]-xvals[1])/2,xvals[2]-1,sprintf("Slope= %.2f",lmtvals$coefficients[2]))


plot(lacoef[-1,c(3,5)],
     main="ILAA: Univariate t-values vs regression t-values",
     xlim=xvals,
     ylim=xvals
     )

lmtvals <- lm(lacoef[,5]~lacoef[,3])
pred <- lmtvals$coefficients[1] + lmtvals$coefficients[2] * xvals
lines(x=xvals,y=pred,col="red")
text(xvals[1]+(xvals[2]-xvals[1])/2,xvals[2]-1,sprintf("Slope= %.2f",lmtvals$coefficients[2]))


#pander::pander(summary(lmtvals))


pander::pander(cor.test(smcoef[,3],smcoef[,5]))
Pearson’s product-moment correlation: smcoef[, 3] and smcoef[, 5]
Test statistic df P value Alternative hypothesis cor
5.35 2 0.0332 * two.sided 0.967

pander::pander(cor.test(lacoef[,3],lacoef[,5]))
Pearson’s product-moment correlation: lacoef[, 3] and lacoef[, 5]
Test statistic df P value Alternative hypothesis cor
4.6 2 0.0441 * two.sided 0.956

par(op)

1.3.9.5 Ploting predictions

par(mfrow=c(1,3),cex=0.5)
plot(lmod$fitted.values,predOutcome,main="Raw: lm train predict vs. test predict",xlab="Train",ylab="Test")
plot(lmod_DE$fitted.values,predOutcome_DE,main="ILAA: lm train predict vs. test predict",xlab="Train",ylab="Test")

plot(predOutcome,predOutcome_DE,xlab="Raw Predicted",ylab="ILAA Predicted",main="Raw vs. ILAA")


par(op)

1.3.10 CV

1.3.10.1 test Correlations

par(op)
corresults <- CV_IDeA(dataframe,outcome,loops=loops)

………. ………. ………. ………. ………. ………. ………. ………. ………. ……….


mintvals <- min(c(min(corresults$rawtValues),min(corresults$detValues)))
maxvals <- max(c(max(corresults$rawtValues),max(corresults$detValues)))
xvals <- c(mintvals,maxvals)

vioplot(list(raw=corresults$testRawCorrelations,ILAA=corresults$testDeCorrelations),
        ylab="Pearson Correlation",
        main="Test Correlations")


pander::pander(t.test(corresults$testDeCorrelations,corresults$testRawCorrelations,paired=TRUE))
Paired t-test: corresults$testDeCorrelations and corresults$testRawCorrelations
Test statistic df P value Alternative hypothesis mean difference
3.37 99 0.00108 * * two.sided 0.0048

sylim <- c(1,min(c(20,max(corresults$VIFRaw))))
vioplot(list(raw=corresults$VIFRaw,ILAA=corresults$VIFDe),
        ylab="VIF",
        ylim=sylim,
        main="Test VIF")



pander::pander(summary(cbind(raw=corresults$VIFRaw,ILAA=corresults$VIFDe)))
raw ILAA
Min. : 1.000 Min. :1.001
1st Qu.: 1.928 1st Qu.:1.131
Median : 2.539 Median :1.323
Mean : 4.752 Mean :1.642
3rd Qu.: 3.707 3rd Qu.:1.787
Max. :43.687 Max. :5.264
summary(corresults$VIFRaw)

Min. 1st Qu. Median Mean 3rd Qu. Max. 1.000 1.928 2.539 4.752 3.707 43.687

1.3.11 The t-values


par(op)
par(mfrow=c(1,2),cex=0.5)
plot(corresults$rawtValues,
     main="Raw: Univariate t-values vs Model t-values",
     xlab="Univariate",
     ylab="Model",
     xlim=xvals,
     ylim=xvals)

lmtvals <- lm(Model~.,corresults$rawtValues)
pred <- lmtvals$coefficients[1] + lmtvals$coefficients[2] * xvals
lines(x=xvals,y=pred,col="red")
text(xvals[1]+(xvals[2]-xvals[1])/2,xvals[2]-1,sprintf("Slope= %.2f",lmtvals$coefficients[2]))

pander::pander(summary(lmtvals))
  Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.122 0.146 -14.6 3.46e-40
Uni 0.912 0.025 36.5 6.21e-141
Fitting linear model: Model ~ .
Observations Residual Std. Error \(R^2\) Adjusted \(R^2\)
486 2.27 0.733 0.733

plot(corresults$detValues,
      main="ILAA: Univariate t-values vs Model t-values",
     xlab="Univariate",
     ylab="Model",
     xlim=xvals,
     ylim=xvals)

lmtvals <- lm(Model~.,corresults$detValues)
pred <- lmtvals$coefficients[1] + lmtvals$coefficients[2] * xvals
lines(x=xvals,y=pred,col="red")
text(xvals[1]+(xvals[2]-xvals[1])/2,xvals[2]-1,sprintf("Slope= %.2f",lmtvals$coefficients[2]))


pander::pander(summary(lmtvals))
  Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.09 0.1159 9.4 2.54e-19
Uni 1.12 0.0295 38.0 9.00e-144
Fitting linear model: Model ~ .
Observations Residual Std. Error \(R^2\) Adjusted \(R^2\)
461 1.9 0.759 0.758

1.4 PCA, EFA, PLS, ERT


toPCA <- sapply(apply(dataframe,2,unique),length) >= 5 & colnames(dataframe) != outcome

pc <- prcomp(dataframe[,toPCA],center = TRUE,scale. = TRUE,tol=0.01)   #principal components

if (ncol(dataframe)<20)
{
pander::pander(as.data.frame(pc$rotation),caption="PCA")
}
PCA
  PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 PC13
Age 0.00985 0.75070 -0.419893 0.07900 0.04005 0.2939 -0.03363 -0.2071 -0.15238 -0.26178 0.0306 0.16565 -0.04106
Weight 0.34454 -0.01806 0.039220 0.08660 -0.14167 -0.0310 0.07599 0.0465 0.06070 0.01896 -0.2058 0.19013 -0.87200
Height 0.10114 -0.46854 -0.677696 0.08215 -0.48497 0.1152 0.13421 -0.1019 0.00463 -0.12419 0.0619 -0.00815 0.08952
Neck 0.30559 0.08957 -0.120770 -0.20642 -0.05487 -0.5607 0.00685 0.1147 -0.70288 0.04784 -0.0729 -0.01296 0.09503
Chest 0.31614 0.20916 0.061085 -0.00879 -0.15233 -0.0702 0.45007 0.0608 0.24786 0.43054 0.3976 0.40910 0.21413
Abdomen 0.31180 0.26462 0.122093 0.11998 -0.22918 0.0331 0.29455 0.0862 0.14014 -0.08630 -0.0367 -0.79066 0.04559
Hip 0.32586 0.00309 0.220733 0.17752 -0.16272 0.0451 -0.04918 0.1010 0.13409 -0.32280 -0.6086 0.34279 0.40291
Thigh 0.31009 -0.12318 0.321658 0.07652 -0.09620 0.0620 -0.27274 -0.0409 -0.11467 -0.52247 0.6312 0.07313 -0.01778
Knee 0.30830 -0.04964 -0.000818 0.24672 -0.00528 0.4967 -0.44308 0.1367 -0.27248 0.54033 -0.0127 -0.08786 0.07582
Ankle 0.23053 -0.22414 -0.127712 0.50044 0.67906 -0.0320 0.34680 -0.1669 -0.10614 -0.08013 0.0112 -0.02363 0.03489
Biceps 0.29934 -0.04912 0.075586 -0.32234 0.03479 -0.0451 -0.14989 -0.8446 0.12914 0.14712 -0.1099 -0.08073 0.04901
Forearm 0.24974 -0.13407 -0.070634 -0.68291 0.30995 0.4465 0.22753 0.2734 -0.03492 -0.15276 -0.0428 -0.00493 0.00191
Wrist 0.27913 0.08084 -0.387886 -0.05891 0.26499 -0.3522 -0.46699 0.2717 0.51325 -0.00721 0.0714 -0.07743 0.03044

rotstd <- log10(abs(100*pc$rotation)+1.0)
  gplots::heatmap.2(rotstd,
                    trace = "none",
                    dendrogram="none",
                    breaks=c(0,0.5,1,2,3),
#                    scale="row",
                    mar = c(5,5),
                    col=rainbow(4),
                    main = "PCA Rotation",
                    cexRow = cexheat,
                    cexCol = cexheat,
                    Rowv=FALSE,
                    Colv=FALSE,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="log(|100Rot|+1)",
                    xlab="Output Feature", ylab="Input Feature")



efa <- try(fa(dataframe[,toPCA],ncol(pc$rotation),rotate="varimax",warnings=FALSE))  # EFA analysis

#if (!inherits(efa,"try-error"))
#{
  if (ncol(dataframe)<20)
  {
   pander::pander(as.data.frame(efa$weights),caption="EFA")
#    lds <- efa$weights
#    lds2 <- matrix(as.numeric(lds),nrow=nrow(lds),ncol=ncol(lds))
#    colnames(lds2) <- colnames(lds)
#    rownames(lds2) <- rownames(lds)
#    pander::pander(lds2,caption="EFA")
  }
EFA
  MR1 MR4 MR7 MR2 MR3 MR6 MR5 MR8 MR11 MR10 MR9 MR12 MR13
Age 0.00767 -0.0519 -0.0374 0.5374 -0.0940 0.1465 -0.0691 0.3735 -0.13322 0.0782 0.2238 0.3874 9.24e-16
Weight 0.29780 -0.3989 0.2135 -0.1412 2.4796 -1.2149 -1.7169 0.6369 0.25233 1.0939 0.2345 0.9460 1.86e-14
Height 0.01918 -0.0650 -0.1622 -0.0413 0.2954 0.0365 0.0882 0.0550 -0.01552 -0.1574 -0.0284 -0.1062 -1.90e-15
Neck -0.02601 0.2370 -0.2474 0.0597 -0.0465 0.0447 0.1995 -0.1173 -0.18373 0.7445 -0.0408 -0.0268 -1.88e-15
Chest 0.12731 0.2660 -0.0390 -0.0686 -0.6721 0.0367 0.2428 -0.5590 1.71371 -0.2069 0.2119 0.4767 -3.77e-15
Abdomen 0.48727 -0.2259 -0.2667 0.5718 -0.1819 -0.3868 -0.3698 0.1409 -0.55684 -0.4228 -1.0396 -1.9006 -1.91e-15
Hip 0.48461 -0.3702 -0.0894 -0.3197 -0.9498 0.2950 1.2873 -1.7477 -1.59876 -0.7593 0.6947 1.1673 -8.54e-15
Thigh 0.12102 0.0505 -0.0853 -0.5021 -0.6365 0.3236 0.4616 2.0758 0.59891 0.2457 -0.1960 -0.2491 4.02e-16
Knee -0.15404 -0.0329 0.3065 0.2623 0.1378 1.4142 -0.3833 -0.3405 0.17064 -0.0954 -0.1017 -0.2219 -1.44e-15
Ankle -0.08435 -0.0656 0.5510 -0.0511 -0.1519 -0.1641 -0.1428 -0.0308 0.00159 -0.0111 -0.0337 -0.1002 -7.79e-16
Biceps -0.11642 0.5379 -0.1024 0.0695 -0.1208 -0.0719 -0.3077 0.0332 -0.24585 -0.2367 0.5555 -0.0625 -1.06e-15
Forearm -0.12353 0.5231 -0.1039 -0.0924 -0.0942 0.0797 -0.0999 -0.2008 -0.15371 -0.1667 -0.2532 -0.0227 -1.67e-16
Wrist -0.26733 0.1966 0.4301 0.2822 0.1018 -0.1746 1.0769 0.1301 0.00231 -0.2285 -0.1205 -0.1936 -7.43e-16
  rotstd <- log10(abs(100*efa$weights)+1.0)
    gplots::heatmap.2(rotstd,
                      trace = "none",
                      dendrogram="none",
                      breaks=c(0,0.5,1,2,3),
  #                    scale="row",
                      mar = c(5,5),
                      col=rainbow(4),
                      main = "EFA weights",
                      cexRow = cexheat,
                      cexCol = cexheat,
                      Rowv=FALSE,
                      Colv=FALSE,
                     srtCol=45,
                     srtRow=45,
                      key.title=NA,
                      key.xlab="log(|100W|+1)",
                      xlab="Output Feature", ylab="Input Feature")

#}

  
  
plm <- plsr(formula=formula(paste(outcome,"~.")),data=dataframe,scale =TRUE)
if (ncol(dataframe)<20)
{
  lds <- plm$loadings
  lds2 <- matrix(as.numeric(lds),nrow=nrow(lds),ncol=ncol(lds))
  colnames(lds2) <- colnames(lds)
  rownames(lds2) <- rownames(lds)
  pander::pander(lds2,caption="PLS")
}
PLS
  Comp 1 Comp 2 Comp 3 Comp 4 Comp 5 Comp 6 Comp 7 Comp 8 Comp 9 Comp 10 Comp 11 Comp 12 Comp 13
Age 0.0384 0.57608 -0.6875 0.7478 -0.41789 0.00471 -0.0761 0.10245 -0.37453 0.01428 0.1080 0.2376 -0.28062
Weight 0.3562 -0.08577 0.1436 -0.0683 -0.20422 -0.00403 -0.0936 -0.04584 0.02295 -0.20848 -0.2603 -0.2984 -0.06567
Height 0.0760 -0.62143 0.2716 0.5959 -0.92161 0.40989 0.0618 -0.14581 -0.00432 0.00165 0.0509 0.1227 -0.07420
Neck 0.3161 -0.08098 -0.3655 -0.0146 -0.13866 0.14203 0.2369 -0.12786 0.53456 -0.87736 0.6928 -0.1367 -0.13499
Chest 0.3381 0.15815 0.1224 0.1010 -0.17174 -0.21335 -0.0436 -0.09300 0.44268 -0.14630 -0.2084 0.1896 0.47454
Abdomen 0.3379 0.25099 0.2404 0.1292 0.01884 0.33627 -0.0425 0.00247 0.16578 0.04487 -0.0127 -0.2395 0.06280
Hip 0.3408 0.00303 0.2523 -0.2796 -0.15177 0.00364 -0.1979 -0.05027 -0.12750 0.58828 0.2592 -0.3429 -0.12910
Thigh 0.3195 -0.09734 0.2962 -0.4286 0.17340 0.18765 -0.0593 -0.14376 -0.52426 -0.26922 0.2559 0.3428 0.00953
Knee 0.3161 -0.13333 0.0318 -0.1155 -0.23851 0.06294 0.1724 0.70035 -0.51326 0.22642 -0.1638 0.0435 0.09049
Ankle 0.2245 -0.35117 -0.0426 0.1850 0.47509 -0.27208 -0.8964 0.62422 0.03005 -0.20293 0.1301 0.1241 -0.15447
Biceps 0.3067 -0.13624 -0.0459 -0.0987 0.23737 -0.12850 0.2999 -0.27227 0.23005 0.33561 -0.5523 0.8472 -0.66102
Forearm 0.2489 -0.25932 -0.0386 0.2871 0.59972 -0.71664 0.6784 -0.30636 -0.21534 0.04722 0.2028 -0.3232 0.16145
Wrist 0.2828 -0.20726 -0.7423 0.2230 -0.00939 0.28827 -0.1485 -0.15622 -0.06604 0.46004 -0.3119 -0.2325 0.39020

loadadings <- log10(abs(100*plm$loadings) + 1.0)
  gplots::heatmap.2(loadadings,
                    breaks=c(0,0.5,1,2,3),
                    trace = "none",
                    dendrogram="none",
#                    scale="row",
                    mar = c(5,5),
                    col=rainbow(4),
                    main = "PLS Loadings",
                    cexRow = cexheat,
                    cexCol = cexheat,
                    Rowv=FALSE,
                    Colv=FALSE,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="log(|100Beta|+1)",
                    xlab="Output Feature", ylab="Input Feature")



ERTmod <- ILAA(dataframe,Outcome = outcome,thr=thro)

ERT <- log10(abs(100*attr(ERTmod,"UPLTM")) + 1);
  gplots::heatmap.2(ERT,
                    trace = "none",
                    breaks=c(0,0.5,1,2,3),
                    mar = c(5,5),
                    col=rainbow(4),
                    main = "ERT Rotation",
                    cexRow = cexheat,
                    cexCol = cexheat,
                    dendrogram="none",
                    Rowv=FALSE,
                    Colv=FALSE,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="log(|100Beta|+1)",
                    xlab="Output Feature", ylab="Input Feature")


if (ncol(dataframe)<20)
{
pander::pander(attr(ERTmod,"UPLTM"),caption="ERT")
}
ERT
  La_Age Weight La_Neck La_Chest La_Abdomen La_Hip La_Thigh La_Knee La_Ankle La_Biceps La_Forearm La_Wrist
Age 1.000 0 0.0000 0.000 0.00 0.000 0.0000 0.00 0.0000 0.0000 0.0000 0.0000
Weight 0.359 1 -0.0687 -0.257 0.00 -0.229 -0.0394 -0.07 -0.0354 -0.0823 -0.0433 -0.0232
Neck 0.000 0 1.0000 0.000 0.00 0.000 0.0000 0.00 0.0000 0.0000 0.0000 0.0000
Chest -1.401 0 0.0000 1.000 -1.17 0.000 0.0000 0.00 0.0000 0.0000 0.0000 0.0000
Abdomen 0.000 0 0.0000 0.000 1.00 0.000 0.0000 0.00 0.0000 0.0000 0.0000 0.0000
Hip 0.000 0 0.0000 0.000 0.00 1.000 -0.5050 0.00 0.0000 0.0000 0.0000 0.0000
Thigh 0.000 0 0.0000 0.000 0.00 0.000 1.0000 0.00 0.0000 0.0000 0.0000 0.0000
Knee 0.000 0 0.0000 0.000 0.00 0.000 0.0000 1.00 0.0000 0.0000 0.0000 0.0000
Ankle 0.000 0 0.0000 0.000 0.00 0.000 0.0000 0.00 1.0000 0.0000 0.0000 0.0000
Biceps 0.000 0 0.0000 0.000 0.00 0.000 0.0000 0.00 0.0000 1.0000 0.0000 0.0000
Forearm 0.000 0 0.0000 0.000 0.00 0.000 0.0000 0.00 0.0000 0.0000 1.0000 0.0000
Wrist 0.000 0 0.0000 0.000 0.00 0.000 0.0000 0.00 0.0000 0.0000 0.0000 1.0000

1.5 U-MAP Visualization of features

1.5.1 The UMAP on Raw Data

  thesamples <- c(1:nrow(dataframe));
  if (nrow(dataframe)>2000) 
  {
    thesamples <- sample(thesamples,2000)
  }

  classes <- as.integer(scale(dataframe[thesamples,outcome]))
  classes <- classes - min(classes) + 1
  raincolors <- heat.colors(length(unique(classes)))
  dtatoplot <- as.matrix(FRESAScale(dataframe[thesamples,],method="OrderLogit")$scaledData)
  datasetframe.umap = umap(dtatoplot,n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: RAW",col=raincolors[classes],pch=15)

  
    gplots::heatmap.2(dtatoplot,
                    trace = "none",
                    mar = c(5,5),
                    col=heat.colors(5),
                    main = "Raw",
                    cexRow = cexheat,
                    cexCol = cexheat,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="Z",
                    xlab="Feature", ylab="Subject")




  dtatoplot <- as.matrix(FRESAScale(predict(pc,dataframe[thesamples,]),method="OrderLogit")$scaledData)
  datasetframe.umap = umap(dtatoplot,n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: PCA",col=raincolors[classes],pch=15)


    gplots::heatmap.2(dtatoplot,
                    trace = "none",
                    mar = c(5,5),
                    col=heat.colors(5),
                    main = "PCA",
                    cexRow = cexheat,
                    cexCol = cexheat,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="Z",
                    xlab="Feature", ylab="Subject")


  
if (!inherits(efa,"try-error"))
{
  dtatoplot <- as.matrix(FRESAScale(predict(efa,dataframe[thesamples,toPCA]),method="OrderLogit")$scaledData)
  datasetframe.umap = umap(dtatoplot,n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: EFA",col=raincolors[classes],pch=15)
    gplots::heatmap.2(dtatoplot,
                    trace = "none",
                    mar = c(5,5),
                    col=heat.colors(5),
                    main = "EFA",
                    cexRow = cexheat,
                    cexCol = cexheat,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="Z",
                    xlab="Feature", ylab="Subject")
}

  rotframe <- as.matrix(scale(dataframe[thesamples,rownames(plm$loadings)])) %*% plm$loadings
  
  dtatoplot <- as.matrix(FRESAScale(rotframe,method="OrderLogit")$scaledData)
  datasetframe.umap = umap(dtatoplot,n_components=2)
  
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: PLS",col=raincolors[classes],pch=15)

      gplots::heatmap.2(dtatoplot,
                    trace = "none",
                    mar = c(5,5),
                    col=heat.colors(5),
                    main = "PLS",
                    cexRow = cexheat,
                    cexCol = cexheat,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="Z",
                    xlab="Feature", ylab="Subject")



  dtatoplot <- as.matrix(FRESAScale(ERTmod[thesamples,colnames(ERTmod) != outcome],method="OrderLogit")$scaledData)
  datasetframe.umap = umap(dtatoplot,n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: ERT",col=raincolors[classes],pch=15)

      gplots::heatmap.2(dtatoplot,
                    trace = "none",
                    mar = c(5,5),
                    col=heat.colors(5),
                    main = "ERT",
                    cexRow = cexheat,
                    cexCol = cexheat,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="Z",
                    xlab="Feature", ylab="Subject")

1.6 The Body Fat plots

Final plots


plot(predOutcome,testingset[,outcome],xlab="Raw Predicted",ylab=outcome,main="Raw Fat Prediction")

plot(predOutcome_DE,testingset[,outcome],xlab="IDeA Predicted",ylab=outcome,main="IDeA Fat Prediction")